{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "661d3e50",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.neural_network import MLPClassifier\n",
    "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
    "import rce\n",
    "from scipy.stats import sem\n",
    "import Datasets as DS"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "00605510",
   "metadata": {},
   "source": [
    "## User preferences\n",
    "\n",
    "Here, the user can choose the dataset (`'banknote'`, `'diabetes'`, or `'ionosphere'`), the number of factual instances, the time limit (in seconds), as well as the uncertainty set (`'l2'` or `'linf'`) and rho. Furthermore, the models that should be fit to the data can be specified in a dictionary."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a3e804b",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "dataset_name = 'banknote' # or 'diabetes' or 'ionosphere'\n",
    "num_instances = 20\n",
    "time_limit = 1000\n",
    "rho = 0.05\n",
    "unc_type = 'linf' # or 'l2'\n",
    "\n",
    "clf_dict = {'linear': [0], 'cart': [3, 5, 10], 'rf': [5, 10, 20, 50, 100],\n",
    "           'gbm': [5, 10, 20, 50, 100], 'mlp': [(10,), (10, 10, 10), (50,), (100,)]}"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5fc4d32c",
   "metadata": {},
   "source": [
    "## Load dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cbf02000",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = getattr(DS,dataset_name)\n",
    "df = data('../data/')\n",
    "\n",
    "scaler = MinMaxScaler()\n",
    "df = pd.DataFrame(scaler.fit_transform(df), columns = df.columns)\n",
    "X = df.iloc[:,:-1]\n",
    "y = df['Outcome']\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b2b07454",
   "metadata": {},
   "source": [
    "## Robust counterfcatual explanation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "36a26b00",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "fnamefull = './results_%s/%s_results_%s.txt' % (dataset_name,dataset_name,unc_type)\n",
    "\n",
    "num_iterations_dict = {(i, j):[] for i in clf_dict.keys() for j in clf_dict[i]}\n",
    "comp_time_dict = {(i, j):[] for i in clf_dict.keys() for j in clf_dict[i]}\n",
    "dist_early_stops = {(i, j):[] for i in clf_dict.keys() for j in clf_dict[i]}\n",
    "early_stops_iter = {(i, j):[] for i in clf_dict.keys() for j in clf_dict[i]}\n",
    "\n",
    "\n",
    "for clf_type in clf_dict.keys():\n",
    "    for param in clf_dict[clf_type]:\n",
    "\n",
    "        if clf_type == 'cart':\n",
    "            clf = DecisionTreeClassifier(max_depth=param).fit(X, y)\n",
    "        elif clf_type == 'rf':\n",
    "            clf = RandomForestClassifier(max_depth=3, random_state=0, n_estimators=param).fit(X, y)\n",
    "        elif clf_type == 'mlp':\n",
    "            clf = MLPClassifier(hidden_layer_sizes=param, activation='relu', random_state=0, max_iter=10000).fit(X, y)\n",
    "        elif clf_type == 'gbm':\n",
    "            clf = GradientBoostingClassifier(n_estimators=param, learning_rate=1.0, max_depth=2, random_state=0).fit(X, y)\n",
    "        elif clf_type == 'linear':\n",
    "            clf = LogisticRegression(random_state=param).fit(X, y)\n",
    "\n",
    "        for i in range(num_instances):\n",
    "            print(f'######## Iteration number: {i} ########')\n",
    "            np.random.seed(i)\n",
    "            u = pd.DataFrame([X.iloc[i, :]])\n",
    "            \n",
    "            \n",
    "            if clf_type == 'linear':\n",
    "                it = False\n",
    "            else:\n",
    "                it = True\n",
    "            \n",
    "            final_model, num_iterations, comp_time, x_, solutions_master_dict = rce.generate(clf, X, y, '../experiments/results_%s' % dataset_name, clf_type, 'binary', u, list(u.columns), [], [], {}, [], [], [], rho,\n",
    "                             unc_type=unc_type, iterative=it, time_limit=time_limit)\n",
    "            \n",
    "            if x_ is not None:\n",
    "                solution_subopt, dist = rce.find_maxrad(x_, clf_type, 'results_%s' % dataset_name, x_.columns, [], [], {}, [], [], [], clf.predict(u)[0], unc_type)\n",
    "            if x_ is None or dist + rho/100 < rho:\n",
    "                best_dist = 0\n",
    "                for i in range(len(solutions_master_dict)):\n",
    "                    x_ = solutions_master_dict[i]['sol']\n",
    "                    solution_subopt_i, dist_i = rce.find_maxrad(x_, clf_type, 'results_%s' % dataset_name, x_.columns, [], [], {}, [], [], [], clf.predict(u)[0], unc_type)\n",
    "                    if dist_i >= best_dist:\n",
    "                        best_dist = dist_i\n",
    "                print(best_dist)\n",
    "                dist_early_stops[(clf_type, param)].append(best_dist)\n",
    "                early_stops_iter[(clf_type, param)].append(num_iterations)\n",
    "                print('\\n\\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ ERROR @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\\n\\n')\n",
    "\n",
    "            else:\n",
    "                num_iterations_dict[(clf_type, param)].append(num_iterations)\n",
    "                comp_time_dict[(clf_type, param)].append(comp_time)\n",
    "\n",
    "            # write results to .txt file\n",
    "            num_iterations_list = num_iterations_dict[(clf_type, param)] \n",
    "            comp_time_list = comp_time_dict[(clf_type, param)] \n",
    "            dist_early_stops_list = dist_early_stops[(clf_type,param)]\n",
    "            early_stops_iter_list = early_stops_iter[(clf_type, param)]\n",
    "\n",
    "            txt = '{0}: \\t {1} \\t {2:.2f} ({3:.2f}) \\t {4:.2f} ({5:.2f}) \\t {6} \\t {7:.3f} ({8:.3f}) \\t {9:.2f} ({10:.2f}) '.format(clf_type,\n",
    "                                      param,\n",
    "                                      np.mean(comp_time_list),\n",
    "                                        sem(comp_time_list),\n",
    "                                        np.mean(num_iterations_list), \n",
    "                                        sem(num_iterations_list), \n",
    "                                        len(dist_early_stops_list),\n",
    "                                        np.mean(dist_early_stops_list),\n",
    "                                        sem(dist_early_stops_list), \n",
    "                                        np.mean(early_stops_iter_list),\n",
    "                                        sem(early_stops_iter_list))\n",
    "        with open(fnamefull, 'a') as f:   \n",
    "            print(txt,file=f)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
